### Set working directory, change accordingly
setwd("~/Documents/Research/GOV_paper")

### Load the data
load("data_weiler_etal_gov.Rdata")

### Codebook (variable name - short description)
# 'admin_seats' - number of administative seats
# 'legis_seats' - number of legislative seats
# 'cumulative_measure1' - aggregate access measure, weighted
# 'cumulative_measure2' - aggregate access measure, not weighted
# 'employees_log' - number of employees (logged)
# 'group_type' - type of interest group (citizen group, economic group, other)
# 'age_log' - age of group in years (logged)
# 'imp_nat' - importance of national level for lobbying
# 'pol_comp' - strength of competition in field of activity
# 'pol_act' - intensity of political activity of group
# 'ind_memb' - number of individual members 
# 'corp_memb' - number of corporate members 
# 'extra_measure' - addition measure of access described in appendix

### Load required packages (may be necessary to download them first)
library(pscl)
library(VGAM)


### Run the main models of Tables 1 and 2

## Model 1 (selection and allocation stag2)
# Combined model (Hurdle model)
mod.hurdle2 <- hurdle(admin_seats ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data = dat, dist = "poisson", zero.dist = "binomial", subset=duplicate==1)
summary(mod.hurdle2)

# First stage only
bin.mod2 <- glm(I(admin_seats>0) ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data = dat, family = "binomial", subset=duplicate==1)
summary(bin.mod2)

# Second stage only
count.mod2 <- vglm(admin_seats ~  employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data = dat, family = pospoisson(), subset=admin_seats>0 & duplicate==1)
summary(count.mod2)


### Model 2 (selection and allocation stag2)
# Combined model (Hurdle model)
mod.hurdle1 <- hurdle(legis_seats ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data = dat, dist = "poisson", zero.dist = "binomial", subset=duplicate==1)
summary(mod.hurdle1)

# First stage only
bin.mod1 <- glm(I(legis_seats>0) ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data = dat, family = "binomial",subset=duplicate==1)
summary(bin.mod1)

# Second stage only
count.mod1 <- vglm(legis_seats ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data = dat, family = pospoisson(), subset=legis_seats>0&duplicate==1)
summary(count.mod1)


### Model 3 (selection and allocation stag2)
# No canned version available (DV has non-integers), stages modelled separately
# First stage (selection)
bin.mod3 <- glm(I(cumulative_measure1>0) ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data=dat, family = "binomial", subset=duplicate==1)
summary(bin.mod3)

# Second stage (allocation)
count.mod3 <- glm(cumulative_measure1 ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb,  family = Gamma(link = "log") , data=dat, subset=cumulative_measure1>0)
summary(count.mod3)


### Model 4 (selection and allocation stag2)
# Combined model (Hurdle model)
mod.hurdle4 <- hurdle(cumulative_measure2 ~  employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data = dat, dist = "poisson", zero.dist = "binomial", subset=duplicate==1)
summary(mod.hurdle4)

# First stage only
bin.mod4 <- glm(I(cumulative_measure2>0) ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data = dat, family = "binomial", subset=duplicate==1)
summary(bin.mod4)

# Second stage only
count.mod4 <- vglm(cumulative_measure2 ~  employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data = dat, family = pospoisson(), subset=cumulative_measure2>0 & duplicate==1)
summary(count.mod4)



## Models from Table A3 in the appendix
# First stage model (selection)
bin.mod5 <- glm(I(extra_measure>0) ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb, data=dat, family = "binomial", subset=duplicate==1)
summary(bin.mod5)

# Second stage model (allocation)
count.mod5 <- glm(extra_measure ~ employees_log + group_type + age_log + imp_nat + pol_comp +  pol_act + corp_memb + ind_memb,  family = Gamma(link = "log") , data=dat, subset=cumulative_measure1>0)
summary(count.mod5)
